from pdf2docx import parse

pdf_file = '/home/FAST_DATA_MIRROR/Langchain-Chatchat-master/云鹰平台服务系统接口协议V1.8.4.pdf'
docx_file = '/home/FAST_DATA_MIRROR/Langchain-Chatchat-master/tabel/sample.docx'
pdf_file = '/home/FAST_DATA_MIRROR/Langchain-Chatchat-master/tabel_and_images/1904.02701v1.pdf'
docx_file = '/home/FAST_DATA_MIRROR/Langchain-Chatchat-master/tabel_and_images/1904.02701v1.docx'
# convert pdf to docx
parse(pdf_file, docx_file)